Text Analytics
EntityDetector
- Python
- Scala
from synapse.ml.services import *
textKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
df = spark.createDataFrame([
("1", "Microsoft released Windows 10"),
("2", "In 1975, Bill Gates III and Paul Allen founded the company.")
], ["id", "text"])
entity = (EntityDetector()
.setSubscriptionKey(textKey)
.setLocation("eastus")
.setLanguage("en")
.setOutputCol("replies")
.setErrorCol("error"))
entity.transform(df).show()
import com.microsoft.azure.synapse.ml.services.text.EntityDetector
import spark.implicits._
import org.apache.spark.sql.functions.{col, flatten}
val textKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val df = Seq(
("1", "Microsoft released Windows 10"),
("2", "In 1975, Bill Gates III and Paul Allen founded the company.")
).toDF("id", "text")
val entity = (new EntityDetector()
.setSubscriptionKey(textKey)
.setLocation("eastus")
.setLanguage("en")
.setOutputCol("replies"))
entity.transform(df).show()
Python API: EntityDetector | Scala API: EntityDetector | Source: EntityDetector |
KeyPhraseExtractor
- Python
- Scala
from synapse.ml.services import *
textKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
df = spark.createDataFrame([
("en", "Hello world. This is some input text that I love."),
("fr", "Bonjour tout le monde"),
("es", "La carretera estaba atascada. Había mucho tráfico el día de ayer.")
], ["lang", "text"])
keyPhrase = (KeyPhraseExtractor()
.setSubscriptionKey(textKey)
.setLocation("eastus")
.setLanguageCol("lang")
.setOutputCol("replies")
.setErrorCol("error"))
keyPhrase.transform(df).show()
import com.microsoft.azure.synapse.ml.services.text.KeyPhraseExtractor
import spark.implicits._
val textKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val df = Seq(
("en", "Hello world. This is some input text that I love."),
("fr", "Bonjour tout le monde"),
("es", "La carretera estaba atascada. Había mucho tráfico el día de ayer."),
("en", null)
).toDF("lang", "text")
val keyPhrase = (new KeyPhraseExtractor()
.setSubscriptionKey(textKey)
.setLocation("eastus")
.setLanguageCol("lang")
.setOutputCol("replies"))
keyPhrase.transform(df).show()
Python API: KeyPhraseExtractor | Scala API: KeyPhraseExtractor | Source: KeyPhraseExtractor |
LanguageDetector
- Python
- Scala
from synapse.ml.services import *
textKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
df = spark.createDataFrame([
("Hello World",),
("Bonjour tout le monde",),
("La carretera estaba atascada. Había mucho tráfico el día de ayer.",),
("你好",),
("こんにちは",),
(":) :( :D",)
], ["text",])
language = (LanguageDetector()
.setSubscriptionKey(textKey)
.setLocation("eastus")
.setTextCol("text")
.setOutputCol("language")
.setErrorCol("error"))
language.transform(df).show()
import com.microsoft.azure.synapse.ml.services.text.LanguageDetector
import spark.implicits._
val textKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val df = Seq(
"Hello World",
"Bonjour tout le monde",
"La carretera estaba atascada. Había mucho tráfico el día de ayer.",
":) :( :D"
).toDF("text")
val language = (new LanguageDetector()
.setSubscriptionKey(textKey)
.setLocation("eastus")
.setOutputCol("replies"))
language.transform(df).show()
Python API: LanguageDetector | Scala API: LanguageDetector | Source: LanguageDetector |
NER
- Python
- Scala
from synapse.ml.services import *
textKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
df = spark.createDataFrame([
("1", "en", "I had a wonderful trip to Seattle last week."),
("2", "en", "I visited Space Needle 2 times.")
], ["id", "language", "text"])
ner = (NER()
.setSubscriptionKey(textKey)
.setLocation("eastus")
.setLanguageCol("language")
.setOutputCol("replies")
.setErrorCol("error"))
ner.transform(df).show()
import com.microsoft.azure.synapse.ml.services.text.NER
import spark.implicits._
val textKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val df = Seq(
("1", "en", "I had a wonderful trip to Seattle last week."),
("2", "en", "I visited Space Needle 2 times.")
).toDF("id", "language", "text")
val ner = (new NER()
.setSubscriptionKey(textKey)
.setLocation("eastus")
.setLanguage("en")
.setOutputCol("response"))
ner.transform(df).show()
Python API: NER | Scala API: NER | Source: NER |
PII
- Python
- Scala
from synapse.ml.services import *
textKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
df = spark.createDataFrame([
("1", "en", "My SSN is 859-98-0987"),
("2", "en",
"Your ABA number - 111000025 - is the first 9 digits in the lower left hand corner of your personal check."),
("3", "en", "Is 998.214.865-68 your Brazilian CPF number?")
], ["id", "language", "text"])
pii = (PII()
.setSubscriptionKey(textKey)
.setLocation("eastus")
.setLanguage("en")
.setOutputCol("response"))
pii.transform(df).show()
import com.microsoft.azure.synapse.ml.services.text.PII
import spark.implicits._
val textKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val df = Seq(
("1", "en", "My SSN is 859-98-0987"),
("2", "en",
"Your ABA number - 111000025 - is the first 9 digits in the lower left hand corner of your personal check."),
("3", "en", "Is 998.214.865-68 your Brazilian CPF number?")
).toDF("id", "language", "text")
val pii = (new PII()
.setSubscriptionKey(textKey)
.setLocation("eastus")
.setLanguage("en")
.setOutputCol("response"))
pii.transform(df).show()
Python API: PII | Scala API: PII | Source: PII |
TextSentiment
- Python
- Scala
from synapse.ml.services import *
textKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
df = spark.createDataFrame([
("I am so happy today, its sunny!", "en-US"),
("I am frustrated by this rush hour traffic", "en-US"),
("The cognitive services on spark aint bad", "en-US"),
], ["text", "language"])
sentiment = (TextSentiment()
.setSubscriptionKey(textKey)
.setLocation("eastus")
.setTextCol("text")
.setOutputCol("sentiment")
.setErrorCol("error")
.setLanguageCol("language"))
sentiment.transform(df).show()
import com.microsoft.azure.synapse.ml.services.text.TextSentiment
import spark.implicits._
val textKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val df = Seq(
("en", "Hello world. This is some input text that I love."),
("fr", "Bonjour tout le monde"),
("es", "La carretera estaba atascada. Había mucho tráfico el día de ayer."),
(null, "ich bin ein berliner"),
(null, null),
("en", null)
).toDF("lang", "text")
val sentiment = (new TextSentiment()
.setSubscriptionKey(textKey)
.setLocation("eastus")
.setLanguageCol("lang")
.setModelVersion("latest")
.setShowStats(true)
.setOutputCol("replies"))
sentiment.transform(df).show()
Python API: TextSentiment | Scala API: TextSentiment | Source: TextSentiment |
Translator
Translate
- Python
- Scala
from synapse.ml.services import *
translatorKey = os.environ.get("TRANSLATOR_KEY", getSecret("translator-key"))
df = spark.createDataFrame([
(["Hello, what is your name?", "Bye"],)
], ["text",])
translate = (Translate()
.setSubscriptionKey(translatorKey)
.setLocation("eastus")
.setTextCol("text")
.setToLanguage(["zh-Hans", "fr"])
.setOutputCol("translation")
.setConcurrency(5))
(translate
.transform(df)
.withColumn("translation", flatten(col("translation.translations")))
.withColumn("translation", col("translation.text"))
.select("translation")).show()
import com.microsoft.azure.synapse.ml.services.translate.Translate
import spark.implicits._
import org.apache.spark.sql.functions.{col, flatten}
val translatorKey = sys.env.getOrElse("TRANSLATOR_KEY", None)
val df = Seq(List("Hello, what is your name?", "Bye")).toDF("text")
val translate = (new Translate()
.setSubscriptionKey(translatorKey)
.setLocation("eastus")
.setTextCol("text")
.setToLanguage(Seq("zh-Hans", "fr"))
.setOutputCol("translation")
.setConcurrency(5))
(translate
.transform(df)
.withColumn("translation", flatten(col("translation.translations")))
.withColumn("translation", col("translation.text"))
.select("translation")).show()
Python API: Translate | Scala API: Translate | Source: Translate |
Transliterate
- Python
- Scala
from synapse.ml.services import *
translatorKey = os.environ.get("TRANSLATOR_KEY", getSecret("translator-key"))
df = spark.createDataFrame([
(["こんにちは", "さようなら"],)
], ["text",])
transliterate = (Transliterate()
.setSubscriptionKey(translatorKey)
.setLocation("eastus")
.setLanguage("ja")
.setFromScript("Jpan")
.setToScript("Latn")
.setTextCol("text")
.setOutputCol("result"))
(transliterate
.transform(df)
.withColumn("text", col("result.text"))
.withColumn("script", col("result.script"))
.select("text", "script")).show()
import com.microsoft.azure.synapse.ml.services.translate.Transliterate
import spark.implicits._
import org.apache.spark.sql.functions.col
val translatorKey = sys.env.getOrElse("TRANSLATOR_KEY", None)
val df = Seq(List("こんにちは", "さようなら")).toDF("text")
val transliterate = (new Transliterate()
.setSubscriptionKey(translatorKey)
.setLocation("eastus")
.setLanguage("ja")
.setFromScript("Jpan")
.setToScript("Latn")
.setTextCol("text")
.setOutputCol("result"))
(transliterate
.transform(df)
.withColumn("text", col("result.text"))
.withColumn("script", col("result.script"))
.select("text", "script")).show()
Python API: Transliterate | Scala API: Transliterate | Source: Transliterate |
Detect
- Python
- Scala
from synapse.ml.services import *
translatorKey = os.environ.get("TRANSLATOR_KEY", getSecret("translator-key"))
df = spark.createDataFrame([
(["Hello, what is your name?"],)
], ["text",])
detect = (Detect()
.setSubscriptionKey(translatorKey)
.setLocation("eastus")
.setTextCol("text")
.setOutputCol("result"))
(detect
.transform(df)
.withColumn("language", col("result.language"))
.select("language")).show()
import com.microsoft.azure.synapse.ml.services.translate.Detect
import spark.implicits._
import org.apache.spark.sql.functions.col
val translatorKey = sys.env.getOrElse("TRANSLATOR_KEY", None)
val df = Seq(List("Hello, what is your name?")).toDF("text")
val detect = (new Detect()
.setSubscriptionKey(translatorKey)
.setLocation("eastus")
.setTextCol("text")
.setOutputCol("result"))
(detect
.transform(df)
.withColumn("language", col("result.language"))
.select("language")).show()
Python API: Detect | Scala API: Detect | Source: Detect |
BreakSentence
- Python
- Scala
from synapse.ml.services import *
translatorKey = os.environ.get("TRANSLATOR_KEY", getSecret("translator-key"))
df = spark.createDataFrame([
(["Hello, what is your name?"],)
], ["text",])
breakSentence = (BreakSentence()
.setSubscriptionKey(translatorKey)
.setLocation("eastus")
.setTextCol("text")
.setOutputCol("result"))
(breakSentence
.transform(df)
.withColumn("sentLen", flatten(col("result.sentLen")))
.select("sentLen")).show()
import com.microsoft.azure.synapse.ml.services.translate.BreakSentence
import spark.implicits._
import org.apache.spark.sql.functions.{col, flatten}
val translatorKey = sys.env.getOrElse("TRANSLATOR_KEY", None)
val df = Seq(List("Hello, what is your name?")).toDF("text")
val breakSentence = (new BreakSentence()
.setSubscriptionKey(translatorKey)
.setLocation("eastus")
.setTextCol("text")
.setOutputCol("result"))
(breakSentence
.transform(df)
.withColumn("sentLen", flatten(col("result.sentLen")))
.select("sentLen")).show()
Python API: BreakSentence | Scala API: BreakSentence | Source: BreakSentence |
DictionaryLookup
- Python
- Scala
from synapse.ml.services import *
translatorKey = os.environ.get("TRANSLATOR_KEY", getSecret("translator-key"))
df = spark.createDataFrame([
(["fly"],)
], ["text",])
dictionaryLookup = (DictionaryLookup()
.setSubscriptionKey(translatorKey)
.setLocation("eastus")
.setFromLanguage("en")
.setToLanguage("es")
.setTextCol("text")
.setOutputCol("result"))
(dictionaryLookup
.transform(df)
.withColumn("translations", flatten(col("result.translations")))
.withColumn("normalizedTarget", col("translations.normalizedTarget"))
.select("normalizedTarget")).show()
import com.microsoft.azure.synapse.ml.services.translate.DictionaryLookup
import spark.implicits._
import org.apache.spark.sql.functions.{col, flatten}
val translatorKey = sys.env.getOrElse("TRANSLATOR_KEY", None)
val df = Seq(List("fly")).toDF("text")
val dictionaryLookup = (new DictionaryLookup()
.setSubscriptionKey(translatorKey)
.setLocation("eastus")
.setFromLanguage("en")
.setToLanguage("es")
.setTextCol("text")
.setOutputCol("result"))
(dictionaryLookup
.transform(df)
.withColumn("translations", flatten(col("result.translations")))
.withColumn("normalizedTarget", col("translations.normalizedTarget"))
.select("normalizedTarget")).show()
Python API: DictionaryLookup | Scala API: DictionaryLookup | Source: DictionaryLookup |
DictionaryExamples
- Python
- Scala
from synapse.ml.services import *
translatorKey = os.environ.get("TRANSLATOR_KEY", getSecret("translator-key"))
df = (spark.createDataFrame([
("fly", "volar")
], ["text", "translation"])
.withColumn("textAndTranslation", array(struct(col("text"), col("translation")))))
dictionaryExamples = (DictionaryExamples()
.setSubscriptionKey(translatorKey)
.setLocation("eastus")
.setFromLanguage("en")
.setToLanguage("es")
.setTextAndTranslationCol("textAndTranslation")
.setOutputCol("result"))
(dictionaryExamples
.transform(df)
.withColumn("examples", flatten(col("result.examples")))
.select("examples")).show()
import com.microsoft.azure.synapse.ml.services.translate.{DictionaryExamples, TextAndTranslation}
import spark.implicits._
import org.apache.spark.sql.functions.{col, flatten}
val translatorKey = sys.env.getOrElse("TRANSLATOR_KEY", None)
val df = Seq(List(TextAndTranslation("fly", "volar"))).toDF("textAndTranslation")
val dictionaryExamples = (new DictionaryExamples()
.setSubscriptionKey(translatorKey)
.setLocation("eastus")
.setFromLanguage("en")
.setToLanguage("es")
.setTextAndTranslationCol("textAndTranslation")
.setOutputCol("result"))
(dictionaryExamples
.transform(df)
.withColumn("examples", flatten(col("result.examples")))
.select("examples")).show()
Python API: DictionaryExamples | Scala API: DictionaryExamples | Source: DictionaryExamples |
DocumentTranslator
- Python
- Scala
from synapse.ml.services import *
translatorKey = os.environ.get("TRANSLATOR_KEY", getSecret("translator-key"))
translatorName = os.environ.get("TRANSLATOR_NAME", "mmlspark-translator")
documentTranslator = (DocumentTranslator()
.setSubscriptionKey(translatorKey)
.setServiceName(translatorName)
.setSourceUrlCol("sourceUrl")
.setTargetsCol("targets")
.setOutputCol("translationStatus"))
import com.microsoft.azure.synapse.ml.services.translate.DocumentTranslator
import spark.implicits._
val translatorKey = sys.env.getOrElse("TRANSLATOR_KEY", None)
val translatorName = sys.env.getOrElse("TRANSLATOR_NAME", None)
val documentTranslator = (new DocumentTranslator()
.setSubscriptionKey(translatorKey)
.setServiceName(translatorName)
.setSourceUrlCol("sourceUrl")
.setTargetsCol("targets")
.setOutputCol("translationStatus"))
Python API: DocumentTranslator | Scala API: DocumentTranslator | Source: DocumentTranslator |
Computer Vision
OCR
- Python
- Scala
from synapse.ml.services import *
cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
df = spark.createDataFrame([
("https://mmlspark.blob.core.windows.net/datasets/OCR/test1.jpg", ),
], ["url", ])
ocr = (OCR()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setImageUrlCol("url")
.setDetectOrientation(True)
.setOutputCol("ocr"))
ocr.transform(df).show()
import com.microsoft.azure.synapse.ml.services.vision.OCR
import spark.implicits._
val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val df = Seq(
"https://mmlspark.blob.core.windows.net/datasets/OCR/test1.jpg"
).toDF("url")
val ocr = (new OCR()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setImageUrlCol("url")
.setDetectOrientation(true)
.setOutputCol("ocr"))
ocr.transform(df).show()
Python API: OCR | Scala API: OCR | Source: OCR |
AnalyzeImage
- Python
- Scala
from synapse.ml.services import *
cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
df = spark.createDataFrame([
("https://mmlspark.blob.core.windows.net/datasets/OCR/test1.jpg", "en"),
("https://mmlspark.blob.core.windows.net/datasets/OCR/test2.png", None),
("https://mmlspark.blob.core.windows.net/datasets/OCR/test3.png", "en")
], ["image", "language"])
ai = (AnalyzeImage()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setImageUrlCol("image")
.setLanguageCol("language")
.setVisualFeatures(["Categories", "Tags", "Description", "Faces", "ImageType", "Color", "Adult", "Objects", "Brands"])
.setDetails(["Celebrities", "Landmarks"])
.setOutputCol("features"))
ai.transform(df).show()
import com.microsoft.azure.synapse.ml.services.vision.AnalyzeImage
import spark.implicits._
val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val df = Seq(
("https://mmlspark.blob.core.windows.net/datasets/OCR/test1.jpg", "en"),
("https://mmlspark.blob.core.windows.net/datasets/OCR/test2.png", null),
("https://mmlspark.blob.core.windows.net/datasets/OCR/test3.png", "en")
).toDF("url", "language")
val ai = (new AnalyzeImage()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setImageUrlCol("url")
.setLanguageCol("language")
.setVisualFeatures(Seq("Categories", "Tags", "Description", "Faces", "ImageType", "Color", "Adult", "Objects", "Brands"))
.setDetails(Seq("Celebrities", "Landmarks"))
.setOutputCol("features"))
ai.transform(df).select("url", "features").show()
Python API: AnalyzeImage | Scala API: AnalyzeImage | Source: AnalyzeImage |
RecognizeText
- Python
- Scala
from synapse.ml.services import *
cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
df = spark.createDataFrame([
("https://mmlspark.blob.core.windows.net/datasets/OCR/test1.jpg", ),
("https://mmlspark.blob.core.windows.net/datasets/OCR/test2.png", ),
("https://mmlspark.blob.core.windows.net/datasets/OCR/test3.png", )
], ["url", ])
rt = (RecognizeText()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setImageUrlCol("url")
.setMode("Printed")
.setOutputCol("ocr")
.setConcurrency(5))
rt.transform(df).show()
import com.microsoft.azure.synapse.ml.services.vision.RecognizeText
import spark.implicits._
val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val df = Seq(
"https://mmlspark.blob.core.windows.net/datasets/OCR/test1.jpg",
"https://mmlspark.blob.core.windows.net/datasets/OCR/test2.png",
"https://mmlspark.blob.core.windows.net/datasets/OCR/test3.png"
).toDF("url")
val rt = (new RecognizeText()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setImageUrlCol("url")
.setMode("Printed")
.setOutputCol("ocr")
.setConcurrency(5))
rt.transform(df).show()
Python API: RecognizeText | Scala API: RecognizeText | Source: RecognizeText |
ReadImage
- Python
- Scala
from synapse.ml.services import *
cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
df = spark.createDataFrame([
("https://mmlspark.blob.core.windows.net/datasets/OCR/test1.jpg", ),
("https://mmlspark.blob.core.windows.net/datasets/OCR/test2.png", ),
("https://mmlspark.blob.core.windows.net/datasets/OCR/test3.png", )
], ["url", ])
ri = (ReadImage()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setImageUrlCol("url")
.setOutputCol("ocr")
.setConcurrency(5))
ri.transform(df).show()
import com.microsoft.azure.synapse.ml.services.vision.ReadImage
import spark.implicits._
val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val df = Seq(
"https://mmlspark.blob.core.windows.net/datasets/OCR/test1.jpg",
"https://mmlspark.blob.core.windows.net/datasets/OCR/test2.png",
"https://mmlspark.blob.core.windows.net/datasets/OCR/test3.png"
).toDF("url")
val ri = (new ReadImage()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setImageUrlCol("url")
.setOutputCol("ocr")
.setConcurrency(5))
ri.transform(df).show()
Python API: ReadImage | Scala API: ReadImage | Source: ReadImage |
RecognizeDomainSpecificContent
- Python
- Scala
from synapse.ml.services import *
cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
df = spark.createDataFrame([
("https://mmlspark.blob.core.windows.net/datasets/DSIR/test2.jpg", )
], ["url", ])
celeb = (RecognizeDomainSpecificContent()
.setSubscriptionKey(cognitiveKey)
.setModel("celebrities")
.setLocation("eastus")
.setImageUrlCol("url")
.setOutputCol("celebs"))
celeb.transform(df).show()
import com.microsoft.azure.synapse.ml.services.vision.RecognizeDomainSpecificContent
import spark.implicits._
val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val df = Seq(
"https://mmlspark.blob.core.windows.net/datasets/DSIR/test2.jpg"
).toDF("url")
val celeb = (new RecognizeDomainSpecificContent()
.setSubscriptionKey(cognitiveKey)
.setModel("celebrities")
.setLocation("eastus")
.setImageUrlCol("url")
.setOutputCol("celebs"))
celeb.transform(df).show()
Python API: RecognizeDomainSpecificContent | Scala API: RecognizeDomainSpecificContent | Source: RecognizeDomainSpecificContent |
GenerateThumbnails
- Python
- Scala
from synapse.ml.services import *
cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
df = spark.createDataFrame([
("https://mmlspark.blob.core.windows.net/datasets/DSIR/test1.jpg", )
], ["url", ])
gt = (GenerateThumbnails()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setHeight(50)
.setWidth(50)
.setSmartCropping(True)
.setImageUrlCol("url")
.setOutputCol("thumbnails"))
gt.transform(df).show()
import com.microsoft.azure.synapse.ml.services.vision.GenerateThumbnails
import spark.implicits._
val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val df: DataFrame = Seq(
"https://mmlspark.blob.core.windows.net/datasets/DSIR/test1.jpg"
).toDF("url")
val gt = (new GenerateThumbnails()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setHeight(50)
.setWidth(50)
.setSmartCropping(true)
.setImageUrlCol("url")
.setOutputCol("thumbnails"))
gt.transform(df).show()
Python API: GenerateThumbnails | Scala API: GenerateThumbnails | Source: GenerateThumbnails |
TagImage
- Python
- Scala
from synapse.ml.services import *
cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
df = spark.createDataFrame([
("https://mmlspark.blob.core.windows.net/datasets/DSIR/test1.jpg", )
], ["url", ])
ti = (TagImage()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setImageUrlCol("url")
.setOutputCol("tags"))
ti.transform(df).show()
import com.microsoft.azure.synapse.ml.services.vision.TagImage
import spark.implicits._
val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val df = Seq(
"https://mmlspark.blob.core.windows.net/datasets/DSIR/test1.jpg"
).toDF("url")
val ti = (new TagImage()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setImageUrlCol("url")
.setOutputCol("tags"))
ti.transform(df).show()
Python API: TagImage | Scala API: TagImage | Source: TagImage |
DescribeImage
- Python
- Scala
from synapse.ml.services import *
cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
df = spark.createDataFrame([
("https://mmlspark.blob.core.windows.net/datasets/DSIR/test1.jpg", )
], ["url", ])
di = (DescribeImage()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setMaxCandidates(3)
.setImageUrlCol("url")
.setOutputCol("descriptions"))
di.transform(df).show()
import com.microsoft.azure.synapse.ml.services.vision.DescribeImage
import spark.implicits._
val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val df = Seq(
"https://mmlspark.blob.core.windows.net/datasets/DSIR/test1.jpg"
).toDF("url")
val di = (new DescribeImage()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setMaxCandidates(3)
.setImageUrlCol("url")
.setOutputCol("descriptions"))
di.transform(df).show()
Python API: DescribeImage | Scala API: DescribeImage | Source: DescribeImage |
Form Recognizer
AnalyzeLayout
- Python
- Scala
from synapse.ml.services import *
cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
imageDf = spark.createDataFrame([
("https://mmlspark.blob.core.windows.net/datasets/FormRecognizer/layout1.jpg",)
], ["source",])
analyzeLayout = (AnalyzeLayout()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setImageUrlCol("source")
.setOutputCol("layout")
.setConcurrency(5))
(analyzeLayout.transform(imageDf)
.withColumn("lines", flatten(col("layout.analyzeResult.readResults.lines")))
.withColumn("readLayout", col("lines.text"))
.withColumn("tables", flatten(col("layout.analyzeResult.pageResults.tables")))
.withColumn("cells", flatten(col("tables.cells")))
.withColumn("pageLayout", col("cells.text"))
.select("source", "readLayout", "pageLayout")).show()
import com.microsoft.azure.synapse.ml.services.form.AnalyzeLayout
import spark.implicits._
val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val imageDf = Seq(
"https://mmlspark.blob.core.windows.net/datasets/FormRecognizer/layout1.jpg"
).toDF("source")
val analyzeLayout = (new AnalyzeLayout()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setImageUrlCol("source")
.setOutputCol("layout")
.setConcurrency(5))
analyzeLayout.transform(imageDf).show()
Python API: AnalyzeLayout | Scala API: AnalyzeLayout | Source: AnalyzeLayout |
AnalyzeReceipts
- Python
- Scala
from synapse.ml.services import *
cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
imageDf = spark.createDataFrame([
("https://mmlspark.blob.core.windows.net/datasets/FormRecognizer/receipt1.png",),
("https://mmlspark.blob.core.windows.net/datasets/FormRecognizer/receipt1.png",)
], ["image",])
analyzeReceipts = (AnalyzeReceipts()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setImageUrlCol("image")
.setOutputCol("receipts")
.setConcurrency(5))
analyzeReceipts.transform(imageDf).show()
import com.microsoft.azure.synapse.ml.services.form.AnalyzeReceipts
import spark.implicits._
val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val imageDf = Seq(
"https://mmlspark.blob.core.windows.net/datasets/FormRecognizer/receipt1.png",
"https://mmlspark.blob.core.windows.net/datasets/FormRecognizer/receipt1.png"
).toDF("source")
val analyzeReceipts = (new AnalyzeReceipts()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setImageUrlCol("source")
.setOutputCol("receipts")
.setConcurrency(5))
analyzeReceipts.transform(imageDf).show()
Python API: AnalyzeReceipts | Scala API: AnalyzeReceipts | Source: AnalyzeReceipts |
AnalyzeBusinessCards
- Python
- Scala
from synapse.ml.services import *
cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
imageDf = spark.createDataFrame([
("https://mmlspark.blob.core.windows.net/datasets/FormRecognizer/business_card.jpg",)
], ["source",])
analyzeBusinessCards = (AnalyzeBusinessCards()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setImageUrlCol("source")
.setOutputCol("businessCards")
.setConcurrency(5))
analyzeBusinessCards.transform(imageDf).show()
import com.microsoft.azure.synapse.ml.services.form.AnalyzeBusinessCards
import spark.implicits._
val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val imageDf = Seq(
"https://mmlspark.blob.core.windows.net/datasets/FormRecognizer/business_card.jpg"
).toDF("source")
val analyzeBusinessCards = (new AnalyzeBusinessCards()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setImageUrlCol("source")
.setOutputCol("businessCards")
.setConcurrency(5))
analyzeBusinessCards.transform(imageDf).show()
Python API: AnalyzeBusinessCards | Scala API: AnalyzeBusinessCards | Source: AnalyzeBusinessCards |
AnalyzeInvoices
- Python
- Scala
from synapse.ml.services import *
cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
imageDf = spark.createDataFrame([
("https://mmlspark.blob.core.windows.net/datasets/FormRecognizer/invoice2.png",)
], ["source",])
analyzeInvoices = (AnalyzeInvoices()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setImageUrlCol("source")
.setOutputCol("invoices")
.setConcurrency(5))
(analyzeInvoices
.transform(imageDf)
.withColumn("documents", explode(col("invoices.analyzeResult.documentResults.fields")))
.select("source", "documents")).show()
import com.microsoft.azure.synapse.ml.services.form.AnalyzeInvoices
import spark.implicits._
val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val imageDf = Seq(
"https://mmlspark.blob.core.windows.net/datasets/FormRecognizer/invoice2.png"
).toDF("source")
val analyzeInvoices = (new AnalyzeInvoices()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setImageUrlCol("source")
.setOutputCol("invoices")
.setConcurrency(5))
analyzeInvoices.transform(imageD4).show()
Python API: AnalyzeInvoices | Scala API: AnalyzeInvoices | Source: AnalyzeInvoices |
AnalyzeIDDocuments
- Python
- Scala
from synapse.ml.services import *
cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
imageDf = spark.createDataFrame([
("https://mmlspark.blob.core.windows.net/datasets/FormRecognizer/id1.jpg",)
], ["source",])
analyzeIDDocuments = (AnalyzeIDDocuments()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setImageUrlCol("source")
.setOutputCol("ids")
.setConcurrency(5))
(analyzeIDDocuments
.transform(imageDf)
.withColumn("documents", explode(col("ids.analyzeResult.documentResults.fields")))
.select("source", "documents")).show()
import com.microsoft.azure.synapse.ml.services.form.AnalyzeIDDocuments
import spark.implicits._
val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val imageDf = Seq(
"https://mmlspark.blob.core.windows.net/datasets/FormRecognizer/id1.jpg"
).toDF("source")
val analyzeIDDocuments = (new AnalyzeIDDocuments()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setImageUrlCol("source")
.setOutputCol("ids")
.setConcurrency(5))
analyzeIDDocuments.transform(imageDf).show()
Python API: AnalyzeIDDocuments | Scala API: AnalyzeIDDocuments | Source: AnalyzeIDDocuments |
AnalyzeCustomModel
- Python
- Scala
from synapse.ml.services import *
cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
modelId = "02bc2f58-2beb-4ae3-84fb-08f011b2f7b8" # put your own modelId here
imageDf = spark.createDataFrame([
("https://mmlspark.blob.core.windows.net/datasets/FormRecognizer/invoice2.png",)
], ["source",])
analyzeCustomModel = (AnalyzeCustomModel()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setModelId(modelId)
.setImageUrlCol("source")
.setOutputCol("output")
.setConcurrency(5))
(analyzeCustomModel
.transform(imageDf)
.withColumn("keyValuePairs", flatten(col("output.analyzeResult.pageResults.keyValuePairs")))
.withColumn("keys", col("keyValuePairs.key.text"))
.withColumn("values", col("keyValuePairs.value.text"))
.withColumn("keyValuePairs", create_map(lit("key"), col("keys"), lit("value"), col("values")))
.select("source", "keyValuePairs")).show()
import com.microsoft.azure.synapse.ml.services.form.AnalyzeCustomModel
import spark.implicits._
val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val modelId = "02bc2f58-2beb-4ae3-84fb-08f011b2f7b8" // put your own modelId here
val imageDf = Seq(
"https://mmlspark.blob.core.windows.net/datasets/FormRecognizer/invoice2.png"
).toDF("source")
val analyzeCustomModel = (new AnalyzeCustomModel()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setModelId(modelId)
.setImageUrlCol("source")
.setOutputCol("output")
.setConcurrency(5))
analyzeCustomModel.transform(imageDf).show()
Python API: AnalyzeCustomModel | Scala API: AnalyzeCustomModel | Source: AnalyzeCustomModel |
GetCustomModel
- Python
- Scala
from synapse.ml.services import *
cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
modelId = "02bc2f58-2beb-4ae3-84fb-08f011b2f7b8" # put your own modelId here
emptyDf = spark.createDataFrame([("",)])
getCustomModel = (GetCustomModel()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setModelId(modelId)
.setIncludeKeys(True)
.setOutputCol("model")
.setConcurrency(5))
(getCustomModel
.transform(emptyDf)
.withColumn("modelInfo", col("model.ModelInfo"))
.withColumn("trainResult", col("model.TrainResult"))
.select("modelInfo", "trainResult")).show()
import com.microsoft.azure.synapse.ml.services.form.GetCustomModel
import spark.implicits._
val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val modelId = "02bc2f58-2beb-4ae3-84fb-08f011b2f7b8" // put your own modelId here
val emptyDf = Seq("").toDF()
val getCustomModel = (new GetCustomModel()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setModelId(modelId)
.setIncludeKeys(true)
.setOutputCol("model")
.setConcurrency(5))
getCustomModel.transform(emptyDf).show()
Python API: GetCustomModel | Scala API: GetCustomModel | Source: GetCustomModel |
ListCustomModels
- Python
- Scala
from synapse.ml.services import *
cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
emptyDf = spark.createDataFrame([("",)])
listCustomModels = (ListCustomModels()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setOp("full")
.setOutputCol("models")
.setConcurrency(5))
(listCustomModels
.transform(emptyDf)
.withColumn("modelIds", col("models.modelList.modelId"))
.select("modelIds")).show()
import com.microsoft.azure.synapse.ml.services.form.ListCustomModels
import spark.implicits._
val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val emptyDf = Seq("").toDF()
val listCustomModels = (new ListCustomModels()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setOp("full")
.setOutputCol("models")
.setConcurrency(5))
listCustomModels.transform(emptyDf).show()
Python API: ListCustomModels | Scala API: ListCustomModels | Source: ListCustomModels |
Form Recognizer V3
AnalyzeDocument
- Python
- Scala
from synapse.ml.services import *
cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
imageDf = spark.createDataFrame([
("https://mmlspark.blob.core.windows.net/datasets/FormRecognizer/layout1.jpg",)
], ["source",])
analyzeDocument = (AnalyzeDocument()
# For supported prebuilt models, please go to documentation page for details
.setPrebuiltModelId("prebuilt-layout")
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setImageUrlCol("source")
.setOutputCol("result")
.setConcurrency(5))
(analyzeDocument.transform(imageDf)
.withColumn("content", col("result.analyzeResult.content"))
.withColumn("cells", flatten(col("result.analyzeResult.tables.cells")))
.withColumn("cells", col("cells.content"))
.select("source", "result", "content", "cells")).show()
import com.microsoft.azure.synapse.ml.services.form.AnalyzeDocument
import spark.implicits._
val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val imageDf = Seq(
"https://mmlspark.blob.core.windows.net/datasets/FormRecognizer/layout1.jpg"
).toDF("source")
val analyzeDocument = (new AnalyzeDocument()
.setPrebuiltModelId("prebuilt-layout")
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setImageUrlCol("source")
.setOutputCol("result")
.setConcurrency(5))
analyzeDocument.transform(imageDf).show()
Python API: AnalyzeDocument | Scala API: AnalyzeDocument | Source: AnalyzeDocument |
Anomaly Detection
DetectLastAnomaly
- Python
- Scala
from synapse.ml.services import *
from pyspark.sql.functions import lit
anomalyKey = os.environ.get("ANOMALY_API_KEY", getSecret("anomaly-api-key"))
df = (spark.createDataFrame([
("1972-01-01T00:00:00Z", 826.0),
("1972-02-01T00:00:00Z", 799.0),
("1972-03-01T00:00:00Z", 890.0),
("1972-04-01T00:00:00Z", 900.0),
("1972-05-01T00:00:00Z", 766.0),
("1972-06-01T00:00:00Z", 805.0),
("1972-07-01T00:00:00Z", 821.0),
("1972-08-01T00:00:00Z", 20000.0),
("1972-09-01T00:00:00Z", 883.0),
("1972-10-01T00:00:00Z", 898.0),
("1972-11-01T00:00:00Z", 957.0),
("1972-12-01T00:00:00Z", 924.0),
("1973-01-01T00:00:00Z", 881.0),
("1973-02-01T00:00:00Z", 837.0),
("1973-03-01T00:00:00Z", 90000.0)
], ["timestamp", "value"])
.withColumn("group", lit(1))
.withColumn("inputs", struct(col("timestamp"), col("value")))
.groupBy(col("group"))
.agg(sort_array(collect_list(col("inputs"))).alias("inputs")))
dla = (DetectLastAnomaly()
.setSubscriptionKey(anomalyKey)
.setLocation("westus2")
.setOutputCol("anomalies")
.setSeriesCol("inputs")
.setGranularity("monthly")
.setErrorCol("errors"))
dla.transform(df).show()
import com.microsoft.azure.synapse.ml.services.anomaly.DetectLastAnomaly
import spark.implicits._
import org.apache.spark.sql.functions.{col, collect_list, lit, sort_array, struct}
val anomalyKey = sys.env.getOrElse("ANOMALY_API_KEY", None)
val df = (Seq(
("1972-01-01T00:00:00Z", 826.0),
("1972-02-01T00:00:00Z", 799.0),
("1972-03-01T00:00:00Z", 890.0),
("1972-04-01T00:00:00Z", 900.0),
("1972-05-01T00:00:00Z", 766.0),
("1972-06-01T00:00:00Z", 805.0),
("1972-07-01T00:00:00Z", 821.0),
("1972-08-01T00:00:00Z", 20000.0),
("1972-09-01T00:00:00Z", 883.0),
("1972-10-01T00:00:00Z", 898.0),
("1972-11-01T00:00:00Z", 957.0),
("1972-12-01T00:00:00Z", 924.0),
("1973-01-01T00:00:00Z", 881.0),
("1973-02-01T00:00:00Z", 837.0),
("1973-03-01T00:00:00Z", 90000.0)
).toDF("timestamp", "value")
.withColumn("group", lit(1))
.withColumn("inputs", struct(col("timestamp"), col("value")))
.groupBy(col("group"))
.agg(sort_array(collect_list(col("inputs"))).alias("inputs")))
val dla = (new DetectLastAnomaly()
.setSubscriptionKey(anomalyKey)
.setLocation("westus2")
.setOutputCol("anomalies")
.setSeriesCol("inputs")
.setGranularity("monthly")
.setErrorCol("errors"))
dla.transform(df).show()
Python API: DetectLastAnomaly | Scala API: DetectLastAnomaly | Source: DetectLastAnomaly |
DetectAnomalies
- Python
- Scala
from synapse.ml.services import *
anomalyKey = os.environ.get("ANOMALY_API_KEY", getSecret("anomaly-api-key"))
df = (spark.createDataFrame([
("1972-01-01T00:00:00Z", 826.0),
("1972-02-01T00:00:00Z", 799.0),
("1972-03-01T00:00:00Z", 890.0),
("1972-04-01T00:00:00Z", 900.0),
("1972-05-01T00:00:00Z", 766.0),
("1972-06-01T00:00:00Z", 805.0),
("1972-07-01T00:00:00Z", 821.0),
("1972-08-01T00:00:00Z", 20000.0),
("1972-09-01T00:00:00Z", 883.0),
("1972-10-01T00:00:00Z", 898.0),
("1972-11-01T00:00:00Z", 957.0),
("1972-12-01T00:00:00Z", 924.0),
("1973-01-01T00:00:00Z", 881.0),
("1973-02-01T00:00:00Z", 837.0),
("1973-03-01T00:00:00Z", 90000.0)
], ["timestamp", "value"])
.withColumn("group", lit(1))
.withColumn("inputs", struct(col("timestamp"), col("value")))
.groupBy(col("group"))
.agg(sort_array(collect_list(col("inputs"))).alias("inputs")))
da = (DetectAnomalies()
.setSubscriptionKey(anomalyKey)
.setLocation("westus2")
.setOutputCol("anomalies")
.setSeriesCol("inputs")
.setGranularity("monthly"))
da.transform(df).show()
import com.microsoft.azure.synapse.ml.services.anomaly.DetectAnomalies
import spark.implicits._
val anomalyKey = sys.env.getOrElse("ANOMALY_API_KEY", None)
val df = (Seq(
("1972-01-01T00:00:00Z", 826.0),
("1972-02-01T00:00:00Z", 799.0),
("1972-03-01T00:00:00Z", 890.0),
("1972-04-01T00:00:00Z", 900.0),
("1972-05-01T00:00:00Z", 766.0),
("1972-06-01T00:00:00Z", 805.0),
("1972-07-01T00:00:00Z", 821.0),
("1972-08-01T00:00:00Z", 20000.0),
("1972-09-01T00:00:00Z", 883.0),
("1972-10-01T00:00:00Z", 898.0),
("1972-11-01T00:00:00Z", 957.0),
("1972-12-01T00:00:00Z", 924.0),
("1973-01-01T00:00:00Z", 881.0),
("1973-02-01T00:00:00Z", 837.0),
("1973-03-01T00:00:00Z", 90000.0)
).toDF("timestamp", "value")
.withColumn("group", lit(1))
.withColumn("inputs", struct(col("timestamp"), col("value")))
.groupBy(col("group"))
.agg(sort_array(collect_list(col("inputs"))).alias("inputs")))
val da = (new DetectAnomalies()
.setSubscriptionKey(anomalyKey)
.setLocation("westus2")
.setOutputCol("anomalies")
.setSeriesCol("inputs")
.setGranularity("monthly"))
da.transform(df).show()
Python API: DetectAnomalies | Scala API: DetectAnomalies | Source: DetectAnomalies |
SimpleDetectAnomalies
- Python
- Scala
from synapse.ml.services import *
anomalyKey = os.environ.get("ANOMALY_API_KEY", getSecret("anomaly-api-key"))
df = (spark.createDataFrame([
("1972-01-01T00:00:00Z", 826.0, 1.0),
("1972-02-01T00:00:00Z", 799.0, 1.0),
("1972-03-01T00:00:00Z", 890.0, 1.0),
("1972-04-01T00:00:00Z", 900.0, 1.0),
("1972-05-01T00:00:00Z", 766.0, 1.0),
("1972-06-01T00:00:00Z", 805.0, 1.0),
("1972-07-01T00:00:00Z", 821.0, 1.0),
("1972-08-01T00:00:00Z", 20000.0, 1.0),
("1972-09-01T00:00:00Z", 883.0, 1.0),
("1972-10-01T00:00:00Z", 898.0, 1.0),
("1972-11-01T00:00:00Z", 957.0, 1.0),
("1972-12-01T00:00:00Z", 924.0, 1.0),
("1973-01-01T00:00:00Z", 881.0, 1.0),
("1973-02-01T00:00:00Z", 837.0, 1.0),
("1973-03-01T00:00:00Z", 90000.0, 1.0),
("1972-01-01T00:00:00Z", 826.0, 2.0),
("1972-02-01T00:00:00Z", 799.0, 2.0),
("1972-03-01T00:00:00Z", 890.0, 2.0),
("1972-04-01T00:00:00Z", 900.0, 2.0),
("1972-05-01T00:00:00Z", 766.0, 2.0),
("1972-06-01T00:00:00Z", 805.0, 2.0),
("1972-07-01T00:00:00Z", 821.0, 2.0),
("1972-08-01T00:00:00Z", 20000.0, 2.0),
("1972-09-01T00:00:00Z", 883.0, 2.0),
("1972-10-01T00:00:00Z", 898.0, 2.0),
("1972-11-01T00:00:00Z", 957.0, 2.0),
("1972-12-01T00:00:00Z", 924.0, 2.0),
("1973-01-01T00:00:00Z", 881.0, 2.0),
("1973-02-01T00:00:00Z", 837.0, 2.0),
("1973-03-01T00:00:00Z", 90000.0, 2.0)
], ["timestamp", "value", "group"]))
sda = (SimpleDetectAnomalies()
.setSubscriptionKey(anomalyKey)
.setLocation("westus2")
.setOutputCol("anomalies")
.setGroupbyCol("group")
.setGranularity("monthly"))
sda.transform(df).show()
import com.microsoft.azure.synapse.ml.services.anomaly.SimpleDetectAnomalies
import spark.implicits._
val anomalyKey = sys.env.getOrElse("ANOMALY_API_KEY", None)
val baseSeq = Seq(
("1972-01-01T00:00:00Z", 826.0),
("1972-02-01T00:00:00Z", 799.0),
("1972-03-01T00:00:00Z", 890.0),
("1972-04-01T00:00:00Z", 900.0),
("1972-05-01T00:00:00Z", 766.0),
("1972-06-01T00:00:00Z", 805.0),
("1972-07-01T00:00:00Z", 821.0),
("1972-08-01T00:00:00Z", 20000.0),
("1972-09-01T00:00:00Z", 883.0),
("1972-10-01T00:00:00Z", 898.0),
("1972-11-01T00:00:00Z", 957.0),
("1972-12-01T00:00:00Z", 924.0),
("1973-01-01T00:00:00Z", 881.0),
("1973-02-01T00:00:00Z", 837.0),
("1973-03-01T00:00:00Z", 9000.0)
)
val df = (baseSeq.map(p => (p._1, p._2, 1.0))
.++(baseSeq.map(p => (p._1, p._2, 2.0)))
.toDF("timestamp", "value", "group"))
val sda = (new SimpleDetectAnomalies()
.setSubscriptionKey(anomalyKey)
.setLocation("westus2")
.setOutputCol("anomalies")
.setGroupbyCol("group")
.setGranularity("monthly"))
sda.transform(df).show()
Python API: SimpleDetectAnomalies | Scala API: SimpleDetectAnomalies | Source: SimpleDetectAnomalies |
Face
DetectFace
- Python
- Scala
from synapse.ml.services import *
cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
df = spark.createDataFrame([
("https://mmlspark.blob.core.windows.net/datasets/DSIR/test1.jpg",),
], ["url"])
face = (DetectFace()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setImageUrlCol("url")
.setOutputCol("detected_faces")
.setReturnFaceId(True)
.setReturnFaceLandmarks(False)
.setReturnFaceAttributes(["age", "gender", "headPose", "smile", "facialHair", "glasses", "emotion",
"hair", "makeup", "occlusion", "accessories", "blur", "exposure", "noise"]))
face.transform(df).show()
import com.microsoft.azure.synapse.ml.services.face.DetectFace
import spark.implicits._
val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val df: DataFrame = Seq(
"https://mmlspark.blob.core.windows.net/datasets/DSIR/test2.jpg"
).toDF("url")
val face = (new DetectFace()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setImageUrlCol("url")
.setOutputCol("face")
.setReturnFaceId(true)
.setReturnFaceLandmarks(true)
.setReturnFaceAttributes(Seq(
"age", "gender", "headPose", "smile", "facialHair", "glasses", "emotion",
"hair", "makeup", "occlusion", "accessories", "blur", "exposure", "noise")))
face.transform(df).show()
Python API: DetectFace | Scala API: DetectFace | Source: DetectFace |
FindSimilarFace
- Python
- Scala
from synapse.ml.services import *
cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
df = spark.createDataFrame([
("https://mmlspark.blob.core.windows.net/datasets/DSIR/test1.jpg",),
("https://mmlspark.blob.core.windows.net/datasets/DSIR/test2.jpg",),
("https://mmlspark.blob.core.windows.net/datasets/DSIR/test3.jpg",)
], ["url"])
detector = (DetectFace()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setImageUrlCol("url")
.setOutputCol("detected_faces")
.setReturnFaceId(True)
.setReturnFaceLandmarks(False)
.setReturnFaceAttributes([]))
faceIdDF = detector.transform(df).select("detected_faces").select(col("detected_faces").getItem(0).getItem("faceId").alias("id"))
faceIds = [row.asDict()['id'] for row in faceIdDF.collect()]
findSimilar = (FindSimilarFace()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setOutputCol("similar")
.setFaceIdCol("id")
.setFaceIds(faceIds))
findSimilar.transform(faceIdDF).show()
import com.microsoft.azure.synapse.ml.services.face.{DetectFace, FindSimilarFace}
import spark.implicits._
val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val df: DataFrame = Seq(
"https://mmlspark.blob.core.windows.net/datasets/DSIR/test1.jpg",
"https://mmlspark.blob.core.windows.net/datasets/DSIR/test2.jpg",
"https://mmlspark.blob.core.windows.net/datasets/DSIR/test3.jpg"
).toDF("url")
val detector = (new DetectFace()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setImageUrlCol("url")
.setOutputCol("detected_faces")
.setReturnFaceId(true)
.setReturnFaceLandmarks(false)
.setReturnFaceAttributes(Seq()))
val faceIdDF = (detector.transform(df)
.select(col("detected_faces").getItem(0).getItem("faceId").alias("id"))
.cache())
val faceIds = faceIdDF.collect().map(row => row.getAs[String]("id"))
val findSimilar = (new FindSimilarFace()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setOutputCol("similar")
.setFaceIdCol("id")
.setFaceIds(faceIds))
findSimilar.transform(faceIdDF).show()
Python API: FindSimilarFace | Scala API: FindSimilarFace | Source: FindSimilarFace |
GroupFaces
- Python
- Scala
from synapse.ml.services import *
cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
df = spark.createDataFrame([
("https://mmlspark.blob.core.windows.net/datasets/DSIR/test1.jpg",),
("https://mmlspark.blob.core.windows.net/datasets/DSIR/test2.jpg",),
("https://mmlspark.blob.core.windows.net/datasets/DSIR/test3.jpg",)
], ["url"])
detector = (DetectFace()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setImageUrlCol("url")
.setOutputCol("detected_faces")
.setReturnFaceId(True)
.setReturnFaceLandmarks(False)
.setReturnFaceAttributes([]))
faceIdDF = detector.transform(df).select("detected_faces").select(col("detected_faces").getItem(0).getItem("faceId").alias("id"))
faceIds = [row.asDict()['id'] for row in faceIdDF.collect()]
group = (GroupFaces()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setOutputCol("grouping")
.setFaceIds(faceIds))
group.transform(faceIdDF).show()
import com.microsoft.azure.synapse.ml.services.face.{DetectFace, GroupFaces}
import spark.implicits._
val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val df: DataFrame = Seq(
"https://mmlspark.blob.core.windows.net/datasets/DSIR/test1.jpg",
"https://mmlspark.blob.core.windows.net/datasets/DSIR/test2.jpg",
"https://mmlspark.blob.core.windows.net/datasets/DSIR/test3.jpg"
).toDF("url")
val detector = (new DetectFace()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setImageUrlCol("url")
.setOutputCol("detected_faces")
.setReturnFaceId(true)
.setReturnFaceLandmarks(false)
.setReturnFaceAttributes(Seq()))
val faceIdDF = (detector.transform(df)
.select(col("detected_faces").getItem(0).getItem("faceId").alias("id"))
.cache())
val faceIds = faceIdDF.collect().map(row => row.getAs[String]("id"))
val group = (new GroupFaces()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setOutputCol("grouping")
.setFaceIds(faceIds))
group.transform(faceIdDF).show()
Python API: GroupFaces | Scala API: GroupFaces | Source: GroupFaces |
IdentifyFaces
- Python
- Scala
from synapse.ml.services import *
cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
pgId = "PUT_YOUR_PERSON_GROUP_ID"
identifyFaces = (IdentifyFaces()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setFaceIdsCol("faces")
.setPersonGroupId(pgId)
.setOutputCol("identified_faces"))
import com.microsoft.azure.synapse.ml.services.face.IdentifyFaces
import spark.implicits._
val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val pgId = "PUT_YOUR_PERSON_GROUP_ID"
val identifyFaces = (new IdentifyFaces()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setFaceIdsCol("faces")
.setPersonGroupId(pgId)
.setOutputCol("identified_faces"))
Python API: IdentifyFaces | Scala API: IdentifyFaces | Source: IdentifyFaces |
VerifyFaces
- Python
- Scala
from synapse.ml.services import *
cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
df = spark.createDataFrame([
("https://mmlspark.blob.core.windows.net/datasets/DSIR/test1.jpg",),
("https://mmlspark.blob.core.windows.net/datasets/DSIR/test2.jpg",),
("https://mmlspark.blob.core.windows.net/datasets/DSIR/test3.jpg",)
], ["url"])
detector = (DetectFace()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setImageUrlCol("url")
.setOutputCol("detected_faces")
.setReturnFaceId(True)
.setReturnFaceLandmarks(False)
.setReturnFaceAttributes([]))
faceIdDF = detector.transform(df).select("detected_faces").select(col("detected_faces").getItem(0).getItem("faceId").alias("faceId1"))
faceIdDF2 = faceIdDF.withColumn("faceId2", lit(faceIdDF.take(1)[0].asDict()['faceId1']))
verify = (VerifyFaces()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setOutputCol("same")
.setFaceId1Col("faceId1")
.setFaceId2Col("faceId2"))
verify.transform(faceIdDF2).show()
import com.microsoft.azure.synapse.ml.services.face.{DetectFace, VerifyFaces}
import spark.implicits._
val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val df: DataFrame = Seq(
"https://mmlspark.blob.core.windows.net/datasets/DSIR/test1.jpg",
"https://mmlspark.blob.core.windows.net/datasets/DSIR/test2.jpg",
"https://mmlspark.blob.core.windows.net/datasets/DSIR/test3.jpg"
).toDF("url")
val detector = (new DetectFace()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setImageUrlCol("url")
.setOutputCol("detected_faces")
.setReturnFaceId(true)
.setReturnFaceLandmarks(false)
.setReturnFaceAttributes(Seq()))
val faceIdDF = (detector.transform(df)
.select(col("detected_faces").getItem(0).getItem("faceId").alias("faceId1"))
.cache())
val faceIdDF2 = faceIdDF.withColumn("faceId2", lit(faceIdDF.take(1).head.getString(0)))
val verify = (new VerifyFaces()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setOutputCol("same")
.setFaceId1Col("faceId1")
.setFaceId2Col("faceId2"))
verify.transform(faceIdDF2).show()
Python API: VerifyFaces | Scala API: VerifyFaces | Source: VerifyFaces |
Speech To Text
SpeechToText
- Python
- Scala
from synapse.ml.services import *
import requests
cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
link = "https://mmlspark.blob.core.windows.net/datasets/Speech/audio2.wav"
audioBytes = requests.get(link).content
df = spark.createDataFrame([(audioBytes,)
], ["audio"])
stt = (SpeechToText()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setOutputCol("text")
.setAudioDataCol("audio")
.setLanguage("en-US")
.setFormat("simple"))
stt.transform(df).show()
import com.microsoft.azure.synapse.ml.services.speech.SpeechToText
import org.apache.commons.compress.utils.IOUtils
import spark.implicits._
import java.net.URL
val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val audioBytes = IOUtils.toByteArray(new URL("https://mmlspark.blob.core.windows.net/datasets/Speech/test1.wav").openStream())
val df: DataFrame = Seq(
Tuple1(audioBytes)
).toDF("audio")
val stt = (new SpeechToText()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setOutputCol("text")
.setAudioDataCol("audio")
.setLanguage("en-US")
.setFormat("simple"))
stt.transform(df).show()
Python API: SpeechToText | Scala API: SpeechToText | Source: SpeechToText |
SpeechToTextSDK
- Python
- Scala
from synapse.ml.services import *
import requests
cognitiveKey = os.environ.get("COGNITIVE_API_KEY", getSecret("cognitive-api-key"))
df = spark.createDataFrame([("https://mmlspark.blob.core.windows.net/datasets/Speech/audio2.wav",)
], ["url"])
speech_to_text = (SpeechToTextSDK()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setOutputCol("text")
.setAudioDataCol("url")
.setLanguage("en-US")
.setProfanity("Masked"))
speech_to_text.transform(df).show()
import com.microsoft.azure.synapse.ml.services.speech.SpeechToTextSDK
import spark.implicits._
import org.apache.commons.compress.utils.IOUtils
import java.net.URL
val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", None)
val df: DataFrame = Seq(
"https://mmlspark.blob.core.windows.net/datasets/Speech/audio2.wav"
).toDF("url")
val speech_to_text = (new SpeechToTextSDK()
.setSubscriptionKey(cognitiveKey)
.setLocation("eastus")
.setOutputCol("text")
.setAudioDataCol("url")
.setLanguage("en-US")
.setProfanity("Masked"))
speech_to_text.transform(df).show()
Python API: SpeechToTextSDK | Scala API: SpeechToTextSDK | Source: SpeechToTextSDK |
Azure Search
AzureSearch
- Python
- Scala
from synapse.ml.services import *
azureSearchKey = os.environ.get("AZURE_SEARCH_KEY", getSecret("azure-search-key"))
testServiceName = "mmlspark-azure-search"
indexName = "test-website"
def createSimpleIndexJson(indexName):
json_str = """
{
"name": "%s",
"fields": [
{
"name": "id",
"type": "Edm.String",
"key": true,
"facetable": false
},
{
"name": "fileName",
"type": "Edm.String",
"searchable": false,
"sortable": false,
"facetable": false
},
{
"name": "text",
"type": "Edm.String",
"filterable": false,
"sortable": false,
"facetable": false
}
]
}
"""
return json_str % indexName
df = (spark.createDataFrame([
("upload", "0", "file0", "text0"),
("upload", "1", "file1", "text1"),
("upload", "2", "file2", "text2"),
("upload", "3", "file3", "text3")
], ["searchAction", "id", "fileName", "text"]))
ad = (AddDocuments()
.setSubscriptionKey(azureSearchKey)
.setServiceName(testServiceName)
.setOutputCol("out")
.setErrorCol("err")
.setIndexName(indexName)
.setActionCol("searchAction"))
ad.transform(df).show()
AzureSearchWriter.writeToAzureSearch(df,
subscriptionKey=azureSearchKey,
actionCol="searchAction",
serviceName=testServiceName,
indexJson=createSimpleIndexJson(indexName))
import com.microsoft.azure.synapse.ml.services.search.{AddDocuments, AzureSearchWriter}
import spark.implicits._
val azureSearchKey = sys.env.getOrElse("AZURE_SEARCH_KEY", None)
val testServiceName = "mmlspark-azure-search"
val indexName = "test-website"
def createSimpleIndexJson(indexName: String) = {
s"""
|{
| "name": "$indexName",
| "fields": [
| {
| "name": "id",
| "type": "Edm.String",
| "key": true,
| "facetable": false
| },
| {
| "name": "fileName",
| "type": "Edm.String",
| "searchable": false,
| "sortable": false,
| "facetable": false
| },
| {
| "name": "text",
| "type": "Edm.String",
| "filterable": false,
| "sortable": false,
| "facetable": false
| }
| ]
| }
""".stripMargin
}
val df = ((0 until 4)
.map(i => ("upload", s"$i", s"file$i", s"text$i"))
.toDF("searchAction", "id", "fileName", "text"))
val ad = (new AddDocuments()
.setSubscriptionKey(azureSearchKey)
.setServiceName(testServiceName)
.setOutputCol("out")
.setErrorCol("err")
.setIndexName(indexName)
.setActionCol("searchAction"))
ad.transform(df).show()
AzureSearchWriter.write(df,
Map("subscriptionKey" -> azureSearchKey,
"actionCol" -> "searchAction",
"serviceName" -> testServiceName,
"indexJson" -> createSimpleIndexJson(indexName)))
Python API: AzureSearch | Scala API: AzureSearch | Source: AzureSearch |
Bing Image Search
BingImageSearch
- Python
- Scala
from synapse.ml.services import *
bingSearchKey = os.environ.get("BING_SEARCH_KEY", getSecret("bing-search-key"))
# Number of images Bing will return per query
imgsPerBatch = 10
# A list of offsets, used to page into the search results
offsets = [(i*imgsPerBatch,) for i in range(100)]
# Since web content is our data, we create a dataframe with options on that data: offsets
bingParameters = spark.createDataFrame(offsets, ["offset"])
# Run the Bing Image Search service with our text query
bingSearch = (BingImageSearch()
.setSubscriptionKey(bingSearchKey)
.setOffsetCol("offset")
.setQuery("Martin Luther King Jr. quotes")
.setCount(imgsPerBatch)
.setOutputCol("images"))
# Transformer that extracts and flattens the richly structured output of Bing Image Search into a simple URL column
getUrls = BingImageSearch.getUrlTransformer("images", "url")
# This displays the full results returned
bingSearch.transform(bingParameters).show()
# Since we have two services, they are put into a pipeline
pipeline = PipelineModel(stages=[bingSearch, getUrls])
# Show the results of your search: image URLs
pipeline.transform(bingParameters).show()
import com.microsoft.azure.synapse.ml.services.bing.BingImageSearch
import spark.implicits._
val bingSearchKey = sys.env.getOrElse("BING_SEARCH_KEY", None)
// Number of images Bing will return per query
val imgsPerBatch = 10
// A list of offsets, used to page into the search results
val offsets = (0 until 100).map(i => i * imgsPerBatch)
// Since web content is our data, we create a dataframe with options on that data: offsets
val bingParameters = Seq(offsets).toDF("offset")
// Run the Bing Image Search service with our text query
val bingSearch = (new BingImageSearch()
.setSubscriptionKey(bingSearchKey)
.setOffsetCol("offset")
.setQuery("Martin Luther King Jr. quotes")
.setCount(imgsPerBatch)
.setOutputCol("images"))
// Transformer that extracts and flattens the richly structured output of Bing Image Search into a simple URL column
val getUrls = BingImageSearch.getUrlTransformer("images", "url")
// This displays the full results returned
bingSearch.transform(bingParameters).show()
// Show the results of your search: image URLs
getUrls.transform(bingSearch.transform(bingParameters)).show()
Python API: BingImageSearch | Scala API: BingImageSearch | Source: BingImageSearch |